Explore CMIP6 data on Casper¶
Ensure you have the required libraries installed, these will make it much easier to work with the data
! pip install netcdf4 xarray[io] cartopy nc-time-axis
import pandas as pd
import xarray as xr
import numpy as np
import os.path
import matplotlib.pyplot as plt
# Useful for plotting maps
import cartopy.crs as ccrs
# This can be useful for working with multiple processors - to be explored later on
# from dask.distributed import Client, LocalCluster
Output data is in /glade/collections/cmip/CMIP6/{activity}/NCC/NorESM2-LM/{experiment}
You can also find other model data here, in particular the NCAR model:
Example path: /glade/collections/cmip/CMIP6/DAMIP/NCAR/CESM2/hist-aer/r1i1p1f1/Amon/tas/gn/latest/*.nc
Input data is in: /glade/p/cesmdata/cseg/inputdata/atm/cam/chem/emis/
The model names are not very obvious but you can either google them, ask ChatGPT, or look them up in these structured dictionaries: https://github.com/PCMDI/cmip6-cmor-tables/tree/main/Tables (which can be queried with e.g. Pandas)
def get_MIP(experiment):
"""
Utility function to get teh activity associated with a particular experiment
"""
if experiment == 'ssp245-covid':
return 'DAMIP'
elif experiment == 'ssp370-lowNTCF':
return 'AerChemMIP'
elif experiment.startswith('ssp'):
return 'ScenarioMIP'
elif experiment.startswith('hist-'):
return 'DAMIP'
else:
return 'CMIP'
def get_data(variable, experiment, member):
"""
Read a particular CMIP6 (Amon) variable from NorESM2
"""
import glob
files = glob.glob(f"/glade/collections/cmip/CMIP6/{get_MIP(experiment)}/NCC/NorESM2-LM/{experiment}/{member}/Amon/{variable}/gn/v20190815/{variable}/*.nc")
return xr.open_mfdataset(files)[variable]
tas = get_data('tas', 'historical', 'r1i1p1f1')
Note, the ensemble member format:
r for realization, i for initialization, p for physics, and f for forcing
We're only interested in different realizations in this project, so try different r numbers but keep the rest the same: E.g.: r1i1p1f1, r2i1p1f1, r3i1p1f1
# When averaging gridded data on a sphere, we need to account for the fact that the values near the poles have less area
weights = np.cos(np.deg2rad(tas.lat))
weights.name = "weights"
tas_timeseries = tas.weighted(weights).mean(['lat', 'lon'])
tas_timeseries.plot()
[<matplotlib.lines.Line2D at 0x14afa2bde800>]
# Plot a map of the average temperature between 1850-1900
tas.sel(time=slice('1850','1900')).mean('time').plot(
transform=ccrs.PlateCarree(), # This is the projection the data is stored as
subplot_kws={"projection": ccrs.PlateCarree()}, # This describes the projection to plot onto (which happens to be the projection the data is already in so no transformation is needed in this case)
)
# Feel free to explore other projections here: https://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html
plt.gca().coastlines()
<cartopy.mpl.feature_artist.FeatureArtist at 0x14afe6b1f160>
Week 3 Tasks - Oct 2024¶
1. Plot a map of the average global temperature between 2005-2015.¶
# Plot a map of the average temperature between 2005-2015
tas.sel(time=slice('2005','2015')).mean('time').plot(
transform=ccrs.PlateCarree(), # This is the projection the data is stored as
subplot_kws={"projection": ccrs.Mollweide()}, # This describes the projection to plot onto
)
# Mollweide projection from: https://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html
plt.gca().coastlines()
plt.title('Average Global Temperature 2005-2015', pad=10)
Text(0.5, 1.0, 'Average Global Temperature 2005-2015')
2. Plot a map of the difference in global temperature from 1850-1900 to 2005-2015 with appropriate colorbar and title.¶
# get the difference in global temperature from 1850-1900 to 2005-2015
diff_tas = tas.sel(time=slice('2005','2015')).mean('time') - tas.sel(time=slice('1850','1900')).mean('time')
# Plot a map of the difference in global temperature from 1850-1900 to 2005-2015
diff_tas.plot(
transform=ccrs.PlateCarree(), # This is the projection the data is stored as
subplot_kws={"projection": ccrs.Mollweide()}, # This describes the projection to plot onto
)
# Mollweide projection from: https://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html
plt.gca().coastlines()
plt.title('Difference in Global Temperature\nfrom 1850-1900 to 2005-2015', pad=20)
Text(0.5, 1.0, 'Difference in Global Temperature\nfrom 1850-1900 to 2005-2015')
3. Plot similar maps for precipitation and daily maximium temperature.¶
# get and store precipitation data
pr = get_data('pr', 'historical', 'r1i1p1f1')
# Plot a map of the average precipitation between 2005-2015
pr.sel(time=slice('2005','2015')).mean('time').plot(
transform=ccrs.PlateCarree(), # This is the projection the data is stored as
subplot_kws={"projection": ccrs.Mollweide()}, # This describes the projection to plot onto
)
plt.gca().coastlines()
plt.title('Average Global Precipitation 2005-2015', pad=20)
Text(0.5, 1.0, 'Average Global Precipitation 2005-2015')
# get the difference in global temperature from 1850-1900 to 2005-2015
diff_pr = pr.sel(time=slice('2005','2015')).mean('time') - pr.sel(time=slice('1850','1900')).mean('time')
# Plot a map of the difference in global precipitation from 1850-1900 to 2005-2015
diff_pr.plot(
transform=ccrs.PlateCarree(), # This is the projection the data is stored as
subplot_kws={"projection": ccrs.Mollweide()}, # This describes the projection to plot onto
)
# Mollweide projection from: https://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html
plt.gca().coastlines()
plt.title('Difference in Global Precipitation\nfrom 1850-1900 to 2005-2015', pad=20)
Text(0.5, 1.0, 'Difference in Global Precipitation\nfrom 1850-1900 to 2005-2015')
# write a function for getting tasmax
def get_tasmax(experiment, member):
import glob
# files = glob.glob("/glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2-LM/historical/r1i1p1f1/day/tasmax/")
files = glob.glob(f"/glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2-LM/{experiment}/{member}/day/tasmax/**/*.nc", recursive=True)
# "/glade/collections/cmip/CMIP6/{get_MIP(experiment)}/NCC/NorESM2-LM/{experiment}/{member}/Amon/{variable}/gn/v20190815/{variable}/*.nc"
return xr.open_mfdataset(files)['tasmax']
# get and store daily maximum temperature data
tasmax = get_tasmax('historical', 'r1i1p1f1')
# Plot a map of the daily maximum temperature between 2005-2015
tasmax.sel(time=slice('2005','2015')).mean('time').plot(
transform=ccrs.PlateCarree(), # This is the projection the data is stored as
subplot_kws={"projection": ccrs.Mollweide()}, # This describes the projection to plot onto
)
plt.gca().coastlines()
plt.title('Average Daily Maximum Temperature 2005-2015', pad=20)
Text(0.5, 1.0, 'Average Daily Maximum Temperature 2005-2015')
# get the difference in daily maximum temperature from 1850-1900 to 2005-2015
diff_tasmax = tasmax.sel(time=slice('2005','2015')).mean('time') - tasmax.sel(time=slice('1850','1900')).mean('time')
# Plot a map of the difference in daily maximum temperature from 1850-1900 to 2005-2015
diff_tasmax.plot(
transform=ccrs.PlateCarree(), # This is the projection the data is stored as
subplot_kws={"projection": ccrs.Mollweide()}, # This describes the projection to plot onto
)
# Mollweide projection from: https://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html
plt.gca().coastlines()
plt.title('Difference in Average Daily Maximum Temperature\nfrom 1850-1900 to 2005-2015', pad=20)
Text(0.5, 1.0, 'Difference in Average Daily Maximum Temperature\nfrom 1850-1900 to 2005-2015')
4. Plot a global average time-series of the variables above. Optionally include multiple ensemble members (or an indication of the spread across ensemble members).¶
# Plot global average temperature time-series
tas_timeseries = tas.weighted(weights).mean(['lat', 'lon'])
tas_timeseries.plot()
plt.title('Average Global Temperature')
Text(0.5, 1.0, 'Average Global Temperature')
# Plot global average precipitation time-series
pr_timeseries = pr.weighted(weights).mean(['lat', 'lon'])
pr_timeseries.plot()
plt.title('Average Global Precipitation')
Text(0.5, 1.0, 'Average Global Precipitation')
tasmax_month_avg = tasmax.resample(time='ME').mean()
# Plot global average daily maximum temperature time-series
tasmax_timeseries = tasmax_month_avg.weighted(weights).mean(['lat', 'lon'])
tasmax_timeseries.plot()
plt.title('Average Global Daily Maximum Temperature')
Text(0.5, 1.0, 'Average Global Daily Maximum Temperature')
5. Pick another variable and produce similar plots.¶
# write a function for getting tasmax
def get_tasmin(experiment, member):
import glob
# files = glob.glob("/glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2-LM/historical/r1i1p1f1/day/tasmax/")
files = glob.glob(f"/glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2-LM/{experiment}/{member}/day/tasmin/**/*.nc", recursive=True)
# "/glade/collections/cmip/CMIP6/{get_MIP(experiment)}/NCC/NorESM2-LM/{experiment}/{member}/Amon/{variable}/gn/v20190815/{variable}/*.nc"
return xr.open_mfdataset(files)['tasmin']
tasmin = get_tasmin('historical', 'r1i1p1f1')
# Plot a map of the daily minimum temperature between 2005-2015
tasmin.sel(time=slice('2005','2015')).mean('time').plot(
transform=ccrs.PlateCarree(), # This is the projection the data is stored as
subplot_kws={"projection": ccrs.Mollweide()}, # This describes the projection to plot onto
)
plt.gca().coastlines()
plt.title('Average Daily Minimum Temperature 2005-2015', pad=20)
Text(0.5, 1.0, 'Average Daily Minimum Temperature 2005-2015')
# get the difference in daily minimum temperature from 1850-1900 to 2005-2015
diff_tasmin = tasmin.sel(time=slice('2005','2015')).mean('time') - tasmin.sel(time=slice('1850','1900')).mean('time')
# Plot a map of the difference in daily maximum temperature from 1850-1900 to 2005-2015
diff_tasmin.plot(
transform=ccrs.PlateCarree(), # This is the projection the data is stored as
subplot_kws={"projection": ccrs.Mollweide()}, # This describes the projection to plot onto
)
# Mollweide projection from: https://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html
plt.gca().coastlines()
plt.title('Difference in Average Daily Minimum Temperature\nfrom 1850-1900 to 2005-2015', pad=20)
Text(0.5, 1.0, 'Difference in Average Daily Minimum Temperature\nfrom 1850-1900 to 2005-2015')
tasmin_month_avg = tasmin.resample(time='ME').mean()
# Plot global average daily minimum temperature time-series
tasmin_timeseries = tasmin_month_avg.weighted(weights).mean(['lat', 'lon'])
tasmin_timeseries.plot()
plt.title('Average Global Daily Minimum Temperature')
Text(0.5, 1.0, 'Average Global Daily Minimum Temperature')
EDA for Project¶
# Function for plotting maps and time series for specified variable
output_folder = './eda'
def average_variable_eda(var, var_name, title):
var_1850_1900 = var.sel(time=slice('1850', '1900')).mean('time')
fig, ax = plt.subplots(subplot_kw={"projection": ccrs.PlateCarree()})
c = var_1850_1900.plot(
ax=ax,
transform=ccrs.PlateCarree(),
cbar_kwargs={'orientation': 'horizontal'} # Make the color bar horizontal
)
plt.title(f"Average {title} Between 1850-1900")
ax.coastlines()
plt.subplots_adjust(bottom=0.15) # Adjust as needed
output_filename = f"avg_{var_name}_1850_1900.png"
plt.savefig(f"{output_folder}/{output_filename}")
plt.show()
var_2005_2005 = var.sel(time=slice('2005', '2015')).mean('time')
fig, ax = plt.subplots(subplot_kw={"projection": ccrs.PlateCarree()})
c = var_2005_2005.plot(
ax=ax,
transform=ccrs.PlateCarree(),
cbar_kwargs={'orientation': 'horizontal'} # Make the color bar horizontal
)
plt.title(f"Average {title} Between 2005-2015")
ax.coastlines()
plt.subplots_adjust(bottom=0.15) # Adjust as needed
output_filename = f"avg_{var_name}_2005_2015.png"
plt.savefig(f"{output_folder}/{output_filename}")
plt.show()
difference = var.sel(time=slice('2005', '2015')).mean('time') - var.sel(time=slice('1850', '1900')).mean('time')
#More recent - past
fig, ax = plt.subplots(subplot_kw={"projection": ccrs.PlateCarree()})
c = difference.plot(
ax=ax,
transform=ccrs.PlateCarree(),
cbar_kwargs={'orientation': 'horizontal'} # Make the color bar horizontal
)
plt.title(f"Difference in {title} From 1850-1900 to 2005-2015")
ax.coastlines()
plt.subplots_adjust(bottom=0.15) # Adjust as needed
output_filename = f"diff_avg_{var_name}.png"
plt.savefig(f"{output_folder}/{output_filename}")
plt.show()
weights = np.cos(np.deg2rad(var.lat))
weights.name = "weights"
var_timeseries = var.weighted(weights).mean(['lat', 'lon'])
var_timeseries.plot()
plt.title(f"{title} Timeseries")
output_filename = f"{var_name}_timeseries.png"
plt.savefig(f"{output_folder}/{output_filename}")
plt.show()
return
experiment = 'historical'
member = 'r2i1p1f1'
variable = 'clt'
import glob
# files = glob.glob("/glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2-LM/historical/r1i1p1f1/day/tasmax/")
# files = glob.glob(f"/glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2-LM/{experiment}/{member}/*/{variable}/**/*.nc", recursive=True)
files = glob.glob(f"/glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2-LM/{experiment}/{member}/day/{variable}/**/**", recursive=True)
# "/glade/collections/cmip/CMIP6/{get_MIP(experiment)}/NCC/NorESM2-LM/{experiment}/{member}/Amon/{variable}/gn/v20190815/{variable}/*.nc"
files
['/glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2-LM/historical/r2i1p1f1/day/clt/']
import xarray as xr
import glob
import os
def find_variables_in_path(base_path):
# Search for all variable subdirectories (e.g., 'tas', 'pr', etc.) in the 'Amon' folder
variable_dirs = glob.glob(f"{base_path}/Amon/*/")
print("Found variables:")
for variable_dir in variable_dirs:
# Extract the variable name from the directory path
variable_name = os.path.basename(os.path.normpath(variable_dir))
print(variable_name)
# Optionally, you can explore the contents of one of the variable files
nc_files = glob.glob(f"{variable_dir}/gn/latest/*.nc")
if nc_files:
# Open the first file and list its variables
ds = xr.open_dataset(nc_files[0])
# print(f" Variables inside {variable_name}:")
# for var in ds.variables:
# print(f" {var}")
else:
print(f" No .nc files found for {variable_name}")
# Path to where you want to search (adjusting to go up one directory from 'tas')
base_path = "/glade/collections/cmip/CMIP6/DAMIP/NCAR/CESM2/hist-aer/r1i1p1f1"
find_variables_in_path(base_path)
# Function for getting CMIP data
def get_CMIP(variable, experiment, member):
import glob
files = glob.glob(f"/glade/collections/cmip/CMIP6/CMIP/NCC/NorESM2-LM/{experiment}/{member}/day/{variable}/**/*.nc", recursive=True)
return xr.open_mfdataset(files)[variable]
# Function for getting DAMIP data
def get_DAMIP(variable, member):
import glob
files = glob.glob(f'/glade/collections/cmip/CMIP6/DAMIP/NCAR/CESM2/*/{member}/*/{variable}/gn/*/*.nc', recursive=True)
return xr.open_mfdataset(files)[variable]
hfss = get_hfss('hfss', 'historical', 'r2i1p1f1')
hfss
# Plot a map of the difference in daily maximum temperature from 1850-1900 to 2005-2015
hfss.plot(
transform=ccrs.PlateCarree(), # This is the projection the data is stored as
subplot_kws={"projection": ccrs.Mollweide()}, # This describes the projection to plot onto
)
# Mollweide projection from: https://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html
plt.gca().coastlines()
plt.title('Difference in Surface Sensible Heat Flux\nfrom 1850-1900 to 2005-2015', pad=20)
average_variable_eda(hfss, 'hfss', 'Surface Sensible Heat Flux')
clt = get_DAMIP('clt', 'r1i1p1f1')
average_variable_eda(clt, 'clt', 'Total Cloud Fraction')
ci = get_DAMIP('ci', 'r1i1p1f1')
average_variable_eda(ci, 'ci', 'Sea Ice Concentration')
clivi = get_DAMIP('clivi', 'r1i1p1f1')
average_variable_eda(clivi, 'clivi', 'Column-Integrated Ice Water')
cl = get_DAMIP('cl', 'r1i1p1f1')
average_variable_eda(cl.mean(dim='lev'), 'cl', 'Column-Integrated Ice Water')
co2mass = get_DAMIP('co2mass', 'r1i1p1f1')
rldscs = get_DAMIP('rldscs', 'r1i1p1f1')
average_variable_eda(rldscs, 'rldscs', 'Clear-Sky Surface Downwelling Longwave Radiation')
clwvi = get_DAMIP('clwvi', 'r1i1p1f1')
average_variable_eda(clwvi, 'clwvi', 'Column-Integrated Liquid Water')
rlutcs = get_DAMIP('rlutcs', 'r1i1p1f1')
average_variable_eda(rlutcs, 'rlutcs', 'Clear-Sky Upwelling Longwave Radiation')